Map election results to regions.
Assumes you have huffpostdata/election-2012-results cloned at ../../election-2012-results
. Does 2000 regions by default; just change county_region_00
to county_region_10
below to do 2010.
In [1]:
from __future__ import division, print_function
%matplotlib inline
In [2]:
import numpy as np
import pandas as pd
import re
import six
In [3]:
from IPython.display import display
In [4]:
import sys
sys.path.append('..')
In [5]:
from pummeler.data import geocode_data
In [6]:
county_to_region = geocode_data('county_region_10').region.to_dict()
In [7]:
from glob import glob
First, handle Alaska specially:
In [8]:
assert len({v for k, v in county_to_region.iteritems() if k.startswith('02')}) == 1
In [9]:
ak_precincts = pd.read_csv('../../election-2012-results/data/ak_precincts.csv')
In [10]:
ak = ak_precincts.groupby(ak_precincts.candidate).sum().reset_index()
ak['state'] = 'ak'
ak['fips'] = next(k for k in county_to_region if k.startswith('02'))
ak['county'] = 'All of Alaska'
In [11]:
ak
Out[11]:
In [12]:
bits = [ak]
for f in glob('../../election-2012-results/data/??.csv'):
piece = pd.read_csv(f, dtype={'fips': str})
piece['state'] = f[-6:-4]
bits.append(piece)
election = pd.concat(bits)
In [13]:
reps = {
'goode': 'virgil goode',
'obama': 'barack obama',
'johnson': 'gary johnson',
'romney': 'mitt romney',
'stein': 'jill stein',
'virgil h. goode': 'virgil goode',
'virgil h. goode jr.': 'virgil goode',
'gary e. johnson': 'gary johnson',
'write in': 'write-in',
'write-ins': 'write-in',
'hoefling': 'tom hoefling',
'obama barack': 'barack obama',
'stein jill': 'jill stein',
'romney mitt': 'mitt romney',
'johnson gary': 'gary johnson',
'jill stein write-in': 'jill stein',
'hoefling (write-in)': 'tom hoefling',
'tom hoeffling': 'tom hoefling',
'alexander': 'stewart alexander',
'ross c. "rocky"': 'ross c. "rocky"',
'ross c. rocky': 'ross c. "rocky"',
'ross c.': 'ross c. "rocky"',
'rocky': 'ross c. "rocky"',
'paul': 'ron paul',
'ron paul write-in': 'ron paul',
'write-in**': 'write-in',
'clymer': 'james clymer',
'roth': 'cecil james roth',
'prokopich': 'barbara prokopich',
'barbara a. prokopich': 'barbara prokopich',
'kevin m. thorne': 'kevin thorne',
'thorne': 'kevin thorne',
}
def rewrite(s):
s = s.lower()
for x in ['/', ',', '(', ' and', ' for president']:
p = s.find(x)
if p != -1:
s = s[:p]
s = s.strip().replace(' ', ' ')
s = reps.get(s, s)
return s
election['cand'] = election.candidate.apply(rewrite)
In [14]:
cand_votes = election.groupby(election.cand).votes.sum().sort_values(ascending=False)
In [15]:
cand_votes.head(50)
Out[15]:
In [16]:
election['party'] = 'oth'
election.loc[election.cand == 'barack obama', 'party'] = 'D'
election.loc[election.cand == 'mitt romney', 'party'] = 'R'
election.loc[election.cand == 'gary johnson', 'party'] = 'L'
election.loc[election.cand == 'jill stein', 'party'] = 'G'
In [17]:
election.groupby(election.party).votes.sum()
Out[17]:
Slightly disagrees with https://en.wikipedia.org/wiki/United_States_presidential_election,_2012: they say Obama 65,915,795, Romney 60,933,504. Not sure how we got too many votes for Romney there; maybe Wikipedia miscounted?
In [18]:
set(election.fips) - set(county_to_region)
Out[18]:
In [19]:
election[pd.isnull(election.fips)]
Out[19]:
UOCAVA = The Uniformed and Overseas Citizens Absentee Voting Act. Ignore these.
In [20]:
{fips for fips in set(county_to_region) - set(election.fips)
if not fips.startswith('02')}
Out[20]:
15005 is Kalawao County, Hawaii, which has a population of 89 and is accessible only by mule trail. Its votes are counted under Maui (15009), and they're in the same PUMA anyway:
In [21]:
county_to_region['15005'] == county_to_region['15009']
Out[21]:
In [22]:
election_region = election.groupby(election.fips.map(county_to_region)) \
.apply(lambda x: x.votes.groupby(x.party).sum()).unstack()
In [23]:
election_region.index.name = 'region'
election_region.columns = ['votes_{}'.format(p) for p in election_region.columns]
In [24]:
election_region.fillna(0, inplace=True)
election_region = election_region.astype('int')
In [25]:
election_region.head()
Out[25]:
In [26]:
election_region.to_csv('2012-by-region.csv.gz', compression='gzip')